library(tidyverse)
library(lubridate)
library(plotly)
RESEARCH_HOME = "/home/and/Documents/PhD/Research"
PATH_FILES = "/Scripts/R/Benchmarks/MultiAndSingleNode/R12/dblab/"
NOHUP_FILE = "nohup10.tsv"
lines = readLines(paste0(RESEARCH_HOME, PATH_FILES, NOHUP_FILE))
lines = lines[grepl("\\|STAGES\\|", lines)]
stages0 = as_tibble(lines) %>%
  separate(value, into=c("Timestamp", "Title", "Nodes", "Cores","StageID", "Stage", "Start", "End", "nTasks", "Runtime", "CPUTime", "InputBytes", "InputRecords", "ShuffleBytes", "ShuffleRecords", "ID"), sep="\\|") %>%
  separate(ID, into=c(NA, NA, "ID"), sep="-") %>%
  mutate(ID = as.numeric(ID), Stage = str_trim(Stage)) %>%
  mutate(uStart = parse_datetime(str_replace(str_trim(Start), "GMT", "")), 
         uEnd = parse_datetime(str_replace(str_trim(End), "GMT", ""))) %>%
  mutate(Duration = as.numeric(uEnd - uStart), 
         uStart = uStart - hours(7),
         Stage = paste0(str_pad(str_trim(StageID), pad="0", side="left", width=5),"_",Stage)) %>%
  select(uStart, Nodes, Stage, Duration) %>%
  arrange(uStart)
stages0$Interval = -2
stages0$FFStage = ""
stages0$Status = ""
head(stages0, n=15)
lines = readLines(paste0(RESEARCH_HOME, PATH_FILES, NOHUP_FILE))
lines = lines[grepl("\\|[1-6]\\.", lines)]
ff = as_tibble(lines) %>%
  separate(value, into=c("Timestamp", "Title", "ID", "Nodes", "Cores", "Status", "Time", "Stage", "Duration", "Load", "Interval"), sep="\\|") %>%
  separate(ID, into=c(NA, NA, "ID"), sep="-") %>%
  mutate(ID = as.numeric(ID), Stage = str_trim(Stage), Duration = as.numeric(Duration)) %>%
  mutate(uStart = parse_datetime(str_replace(Timestamp, ",", ".")), FFStage = Stage, Status = str_trim(Status)) %>%
  select(uStart, Nodes, Stage, Duration, Interval, FFStage, Status)
head(ff)
stages = rbind(stages0, ff) %>% arrange(uStart)
head(stages, n=100)
n = nrow(stages)
ffstage  = ""
interval = ""
for(row in 1:n){
  if(stages[row, "Status"] == "START"){
    ffstage  = stages[row, "FFStage"]
    interval = stages[row, "Interval"]
  } else {
    if(stages[row, "Status"] == "END"){
      ffstage  = ""
      interval = ""
    } else {
      stages[row, "FFStage"] = ffstage
      stages[row, "Interval"] = interval
    }
  }
}
head(stages, n=100)
s = stages %>% filter(Interval != "") %>% filter(grepl("_", Stage)) %>% 
  mutate(FFStage = paste0(Interval,".",FFStage)) %>% 
  select(Nodes, Stage, Duration, FFStage, Interval)
head(s)
p = ggplot(data = s %>% filter(Interval == 1), aes(x = Stage, y = Duration, fill = Nodes)) +
  geom_bar(stat="identity", position=position_dodge(width = 0.75), width = 0.7) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x="Stages", y="Duration(s)")
ggplotly(p)
p = ggplot(data = s %>% filter(Interval == 2), aes(x = Stage, y = Duration, fill = Nodes)) +
  geom_bar(stat="identity", position=position_dodge(width = 0.75), width = 0.7) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x="Stages", y="Duration(s)")
ggplotly(p)
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkocGxvdGx5KQoKUkVTRUFSQ0hfSE9NRSA9ICIvaG9tZS9hbmQvRG9jdW1lbnRzL1BoRC9SZXNlYXJjaCIKUEFUSF9GSUxFUyA9ICIvU2NyaXB0cy9SL0JlbmNobWFya3MvTXVsdGlBbmRTaW5nbGVOb2RlL1IxMi9kYmxhYi8iCk5PSFVQX0ZJTEUgPSAibm9odXAxMC50c3YiCmxpbmVzID0gcmVhZExpbmVzKHBhc3RlMChSRVNFQVJDSF9IT01FLCBQQVRIX0ZJTEVTLCBOT0hVUF9GSUxFKSkKbGluZXMgPSBsaW5lc1tncmVwbCgiXFx8U1RBR0VTXFx8IiwgbGluZXMpXQpzdGFnZXMwID0gYXNfdGliYmxlKGxpbmVzKSAlPiUKICBzZXBhcmF0ZSh2YWx1ZSwgaW50bz1jKCJUaW1lc3RhbXAiLCAiVGl0bGUiLCAiTm9kZXMiLCAiQ29yZXMiLCJTdGFnZUlEIiwgIlN0YWdlIiwgIlN0YXJ0IiwgIkVuZCIsICJuVGFza3MiLCAiUnVudGltZSIsICJDUFVUaW1lIiwgIklucHV0Qnl0ZXMiLCAiSW5wdXRSZWNvcmRzIiwgIlNodWZmbGVCeXRlcyIsICJTaHVmZmxlUmVjb3JkcyIsICJJRCIpLCBzZXA9IlxcfCIpICU+JQogIHNlcGFyYXRlKElELCBpbnRvPWMoTkEsIE5BLCAiSUQiKSwgc2VwPSItIikgJT4lCiAgbXV0YXRlKElEID0gYXMubnVtZXJpYyhJRCksIFN0YWdlID0gc3RyX3RyaW0oU3RhZ2UpKSAlPiUKICBtdXRhdGUodVN0YXJ0ID0gcGFyc2VfZGF0ZXRpbWUoc3RyX3JlcGxhY2Uoc3RyX3RyaW0oU3RhcnQpLCAiR01UIiwgIiIpKSwgCiAgICAgICAgIHVFbmQgPSBwYXJzZV9kYXRldGltZShzdHJfcmVwbGFjZShzdHJfdHJpbShFbmQpLCAiR01UIiwgIiIpKSkgJT4lCiAgbXV0YXRlKER1cmF0aW9uID0gYXMubnVtZXJpYyh1RW5kIC0gdVN0YXJ0KSwgCiAgICAgICAgIHVTdGFydCA9IHVTdGFydCAtIGhvdXJzKDcpLAogICAgICAgICBTdGFnZSA9IHBhc3RlMChzdHJfcGFkKHN0cl90cmltKFN0YWdlSUQpLCBwYWQ9IjAiLCBzaWRlPSJsZWZ0Iiwgd2lkdGg9NSksIl8iLFN0YWdlKSkgJT4lCiAgc2VsZWN0KHVTdGFydCwgTm9kZXMsIFN0YWdlLCBEdXJhdGlvbikgJT4lCiAgYXJyYW5nZSh1U3RhcnQpCnN0YWdlczAkSW50ZXJ2YWwgPSAtMgpzdGFnZXMwJEZGU3RhZ2UgPSAiIgpzdGFnZXMwJFN0YXR1cyA9ICIiCmhlYWQoc3RhZ2VzMCwgbj0xNSkKYGBgCgpgYGB7cn0KbGluZXMgPSByZWFkTGluZXMocGFzdGUwKFJFU0VBUkNIX0hPTUUsIFBBVEhfRklMRVMsIE5PSFVQX0ZJTEUpKQpsaW5lcyA9IGxpbmVzW2dyZXBsKCJcXHxbMS02XVxcLiIsIGxpbmVzKV0KZmYgPSBhc190aWJibGUobGluZXMpICU+JQogIHNlcGFyYXRlKHZhbHVlLCBpbnRvPWMoIlRpbWVzdGFtcCIsICJUaXRsZSIsICJJRCIsICJOb2RlcyIsICJDb3JlcyIsICJTdGF0dXMiLCAiVGltZSIsICJTdGFnZSIsICJEdXJhdGlvbiIsICJMb2FkIiwgIkludGVydmFsIiksIHNlcD0iXFx8IikgJT4lCiAgc2VwYXJhdGUoSUQsIGludG89YyhOQSwgTkEsICJJRCIpLCBzZXA9Ii0iKSAlPiUKICBtdXRhdGUoSUQgPSBhcy5udW1lcmljKElEKSwgU3RhZ2UgPSBzdHJfdHJpbShTdGFnZSksIER1cmF0aW9uID0gYXMubnVtZXJpYyhEdXJhdGlvbikpICU+JQogIG11dGF0ZSh1U3RhcnQgPSBwYXJzZV9kYXRldGltZShzdHJfcmVwbGFjZShUaW1lc3RhbXAsICIsIiwgIi4iKSksIEZGU3RhZ2UgPSBTdGFnZSwgU3RhdHVzID0gc3RyX3RyaW0oU3RhdHVzKSkgJT4lCiAgc2VsZWN0KHVTdGFydCwgTm9kZXMsIFN0YWdlLCBEdXJhdGlvbiwgSW50ZXJ2YWwsIEZGU3RhZ2UsIFN0YXR1cykKCmhlYWQoZmYpCmBgYAoKYGBge3J9CnN0YWdlcyA9IHJiaW5kKHN0YWdlczAsIGZmKSAlPiUgYXJyYW5nZSh1U3RhcnQpCgpoZWFkKHN0YWdlcywgbj0xMDApCmBgYAoKYGBge3J9Cm4gPSBucm93KHN0YWdlcykKZmZzdGFnZSAgPSAiIgppbnRlcnZhbCA9ICIiCmZvcihyb3cgaW4gMTpuKXsKICBpZihzdGFnZXNbcm93LCAiU3RhdHVzIl0gPT0gIlNUQVJUIil7CiAgICBmZnN0YWdlICA9IHN0YWdlc1tyb3csICJGRlN0YWdlIl0KICAgIGludGVydmFsID0gc3RhZ2VzW3JvdywgIkludGVydmFsIl0KICB9IGVsc2UgewogICAgaWYoc3RhZ2VzW3JvdywgIlN0YXR1cyJdID09ICJFTkQiKXsKICAgICAgZmZzdGFnZSAgPSAiIgogICAgICBpbnRlcnZhbCA9ICIiCiAgICB9IGVsc2UgewogICAgICBzdGFnZXNbcm93LCAiRkZTdGFnZSJdID0gZmZzdGFnZQogICAgICBzdGFnZXNbcm93LCAiSW50ZXJ2YWwiXSA9IGludGVydmFsCiAgICB9CiAgfQp9CgpoZWFkKHN0YWdlcywgbj0xMDApCmBgYAoKYGBge3J9CnMgPSBzdGFnZXMgJT4lIGZpbHRlcihJbnRlcnZhbCAhPSAiIikgJT4lIGZpbHRlcihncmVwbCgiXyIsIFN0YWdlKSkgJT4lIAogIG11dGF0ZShGRlN0YWdlID0gcGFzdGUwKEludGVydmFsLCIuIixGRlN0YWdlKSkgJT4lIAogIHNlbGVjdChOb2RlcywgU3RhZ2UsIER1cmF0aW9uLCBGRlN0YWdlLCBJbnRlcnZhbCkKCmhlYWQocykKYGBgCgpgYGB7cn0KcCA9IGdncGxvdChkYXRhID0gcyAlPiUgZmlsdGVyKEludGVydmFsID09IDEpLCBhZXMoeCA9IFN0YWdlLCB5ID0gRHVyYXRpb24sIGZpbGwgPSBOb2RlcykpICsKICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIsIHBvc2l0aW9uPXBvc2l0aW9uX2RvZGdlKHdpZHRoID0gMC43NSksIHdpZHRoID0gMC43KSArIAogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gOTAsIGhqdXN0ID0gMSkpICsKICBsYWJzKHg9IlN0YWdlcyIsIHk9IkR1cmF0aW9uKHMpIikKZ2dwbG90bHkocCkKYGBgCgoKYGBge3J9CnAgPSBnZ3Bsb3QoZGF0YSA9IHMgJT4lIGZpbHRlcihJbnRlcnZhbCA9PSAyKSwgYWVzKHggPSBTdGFnZSwgeSA9IER1cmF0aW9uLCBmaWxsID0gTm9kZXMpKSArCiAgZ2VvbV9iYXIoc3RhdD0iaWRlbnRpdHkiLCBwb3NpdGlvbj1wb3NpdGlvbl9kb2RnZSh3aWR0aCA9IDAuNzUpLCB3aWR0aCA9IDAuNykgKyAKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCBoanVzdCA9IDEpKSArCiAgbGFicyh4PSJTdGFnZXMiLCB5PSJEdXJhdGlvbihzKSIpCmdncGxvdGx5KHApCmBgYAo=